from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

def text_splitter(path):
    """加载文本文件并分割文本。"""
    # 加载文本文件
    loader = TextLoader(path, encoding="utf-8")

    # 定义更合理的分隔符列表
    separators = ["\n\n", "\n", "。", "？", "！", "，", ".", "?", "!", ","]

    # 创建 RecursiveCharacterTextSplitter 实例
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=50,  # 适当增大块的大小，避免太小的块
        chunk_overlap=50,  # 适当的重叠
        length_function=len,  # 使用标准的长度计算方式
        is_separator_regex=False,  # 不使用正则表达式
        separators=separators  # 自定义的分隔符
    )

    # 加载文档并进行分割
    documents = text_splitter.split_documents(loader.load())

    return documents
